Putting Visual Analytics into Practical Use
packages = c('tidyverse', 'ggrepel', 'ggiraph', 'patchwork', 'plotly', 'hrbrthemes',
'heatmaply', 'viridis','ggridges','binr', "trelliscopejs","zoo","ggthemes")
for(p in packages){
if(!require(p, character.only = T)){
install.packages(p)
}
library(p, character.only = T)
}
financeJ <- readRDS(file = "data/financeJ.rds")
participantsD <- readRDS(file = "data/participantsD.rds")
head (financeJ)
# A tibble: 6 × 4
participantId timestamp category amount
<dbl> <dttm> <chr> <dbl>
1 0 2022-03-01 00:00:00 Wage 2473.
2 0 2022-03-01 00:00:00 Shelter -555.
3 0 2022-03-01 00:00:00 Education -38.0
4 1 2022-03-01 00:00:00 Wage 2047.
5 1 2022-03-01 00:00:00 Shelter -555.
6 1 2022-03-01 00:00:00 Education -38.0
head (participantsD)
# A tibble: 6 × 7
participantId householdSize haveKids age educationLevel
<dbl> <dbl> <lgl> <dbl> <chr>
1 0 3 TRUE 36 HighSchoolOrCollege
2 1 3 TRUE 25 HighSchoolOrCollege
3 2 3 TRUE 35 HighSchoolOrCollege
4 3 3 TRUE 21 HighSchoolOrCollege
5 4 3 TRUE 43 Bachelors
6 5 3 TRUE 32 HighSchoolOrCollege
# … with 2 more variables: interestGroup <chr>, joviality <dbl>
cuts <- bins(participantsD$age,target.bins = 7,minpts = 120)
cuts$breaks <- bins.getvals(cuts)
cuts$binct
[18, 23] [24, 29] [30, 35] [36, 42] [43, 48] [49, 54] [55, 60]
136 137 154 153 136 156 139
age_group1 <- 18:23 #create a vector of age
age_group2 <- 24:29
age_group3 <- 30:35
age_group4 <- 36:42
age_group5 <- 43:48
age_group6 <- 49:54
age_group7 <- 55:60
participants_final <- participantsD %>%
mutate (agegroup= case_when(
age %in% age_group1 ~ "18-23",
age %in% age_group2 ~ "24-29",
age %in% age_group3 ~ "30-35",
age %in% age_group4 ~ "36-42",
age %in% age_group5 ~ "43-48",
age %in% age_group6 ~ "49-54",
age %in% age_group7 ~ "55-60")) %>%
select (participantId,agegroup)
data_plot <- final %>%
select(c("Timestamp","Participant_ID", "category","amount")) %>% #choose the columns to subset
group_by(Timestamp,category,Participant_ID)%>%
summarise(amount = round(sum(amount),2)) %>%
pivot_wider(names_from = "category",values_from = "amount")
data_plot[is.na(data_plot)] = 0
data_plot [3:8] <- abs(data_plot[3:8])
data_plot <- data_plot %>%
mutate(Expenses = (Education + Food + Recreation + Shelter))
trelis_plot <- data_plot %>%
select(c("Timestamp","Participant_ID","Expenses", "Wage"))
ggplot(trelis_plot, aes(y = Expenses, x = (Timestamp))) +
geom_point() + geom_line() +
labs (y = "Total Expenses \n of the Month", x = 'Month', title = "Monthly Expenditure of Participant") +
scale_x_date(date_breaks = "months",
date_labels = "%b") +
facet_trelliscope(~ Participant_ID, nrow = 2, ncol = 2, width = 800, as_plotly = TRUE, path = "trellis/", self_contained = TRUE)
ggplot(trelis_plot, aes(y = Wage, x = (Timestamp))) +
geom_point() + geom_line() +
labs (y = "Total Wage \n of the Month", x = 'Month', title = "Monthly Wage of Participant") +
scale_x_date(date_breaks = "months",
date_labels = "%b") +
facet_trelliscope(~ Participant_ID, nrow = 2, ncol = 2, width = 800, as_plotly = TRUE, path = "trellis1/")
histo_plot <- final %>%
select(c("Participant_ID","category","amount","agegroup")) %>%
group_by(Participant_ID,category,agegroup) %>%
summarise (amount = round(sum(amount),2)) %>%
pivot_wider(names_from = "category",values_from = "amount")
histo_plot[is.na(histo_plot)] = 0
histo_plot [3:8] <- abs(histo_plot[3:8])
histo_plot <- histo_plot %>%
mutate(Expenses = Education + Food + Recreation + Shelter) %>%
mutate(Income = RentAdjustment + Wage) %>%
mutate (Savings = Income - Expenses) %>%
mutate (PctSavings = round((Savings/Income) *100,0)) %>%
mutate(PctNeedsExpenses = round(((Education+Food+Shelter)/Income) *100,0))
p <- ggplot(data=histo_plot,
aes(x = PctSavings)) +
geom_vline(aes(xintercept = 20.5), color="red", linetype = "dashed", alpha = 0.3) +
annotate ("text", x=9, y = 0.5, label = "Participants with less than \n20% of Savings", size = 2.5, angle = 0,) +
geom_dotplot_interactive(
aes(tooltip = agegroup,
data_id = agegroup),
stackgroups = TRUE,
binwidth = 1,
method = "histodot") +
scale_y_continuous(NULL, breaks = NULL) +
scale_x_continuous(labels = c("0","20","40","60", "80", "100"),breaks = c(0,20,40,60,80,100)) +
theme_few() + labs (title = "Participants Savings in Percentage", x = "Savings (%)")
p2 <- ggplot(data=histo_plot,
aes(x = PctNeedsExpenses)) +
geom_vline(aes(xintercept = 49.5), color="blue", linetype = "dashed", alpha = 0.3) +
annotate ("text", x=70, y = 0.5, label = "Participants that spend more than \n50% of income on Needs", size = 2.5, angle = 0,) +
geom_dotplot_interactive(
aes(tooltip = agegroup,
data_id = agegroup),
stackgroups = TRUE,
binwidth = 1,
method = "histodot") +
scale_y_continuous(NULL, breaks = NULL) +
scale_x_continuous(labels = c("0","20","40","60", "80", "100"),breaks = c(0,20,40,60,80,100), limits = c(0,100)) +
theme_few() + labs (title = "Participants Expenses (Needs) in Percentage", x = "Expenses (%)")
girafe(
code = print (p/p2),
width_svg = 8,
height_svg = 8)